# -*- coding: utf-8 -*-

import re
import os
import time
from tqdm import tqdm
import urllib.request
import numpy as np
import pandas as pd
from cn2an import cn2an
try:
    from ..utils import get_file_path
    from ..lunar import TGDZ, get_tgdz_year
    from ..festival import FESTIVAL, FESTIVAL_LUNAR
    from ..xingzuo import get_xingzuo
    from .chengming import _get_lines
except:
    from chncal.utils import get_file_path
    from chncal.lunar import TGDZ, get_tgdz_year
    from chncal.festival import FESTIVAL, FESTIVAL_LUNAR
    from chncal.xingzuo import get_xingzuo
    from chncal.constant_creator.chengming import _get_lines
# https://www.cnblogs.com/adampei-bobo/p/6401924.html
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


constants_template = """# -*- coding: utf-8
    
# this file is generated by:
# chncal.constant_creator.hko_calendar.create_hko_constant

from __future__ import absolute_import, unicode_literals


sol_lun = {}

lun_sol = {}

sol_gz = {}
"""


def _download_hko_calendar(year, force=False):
    save_dir = get_file_path('hko_calendar', 'data')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, '%s.txt'%year)
    if force or (not os.path.exists(save_path)):
        url = 'https://www.hko.gov.hk/tc/gts/time/calendar/text/files/T%sc.txt'%year
        urllib.request.urlretrieve(url, save_path)


def download_hko_calendar(start_year=1901, end_year=2100, force=False):
    """
    从香港天文台下载1901——2100年公历和农历转换对照表：
    https://www.hko.gov.hk/tc/gts/time/conversion1_text.htm
    更多天文历信息：
    寿星天文历: http://www.nongli.net/sxwnl/
    sxtwl: https://github.com/yuangu/sxtwl_cpp
           https://gitee.com/yuangu/sxtwl
    """
    print('downloading hko calendar...')
    time.sleep(0.2)
    with tqdm(range(start_year, end_year+1)) as pbar:
        for year in pbar:
            _download_hko_calendar(year, force=force)
            pbar.set_description('{}'.format(year))
            # pbar.set_postfix(year=year)
    time.sleep(0.2)


def load_hko_calendar(year):
    save_dir = get_file_path('hko_calendar', 'data')
    fpath = os.path.join(save_dir, '%s.txt'%year)
    if not os.path.exists(fpath):
        print('香港天文台%s年日历数据未找到！'%year)
        return pd.DataFrame(columns=['公历日期', '农历日期', '星期', '节气'])
    with open(fpath, 'r', encoding='utf-8') as f:
        data = f.readlines()
    data = [re.sub(r'\s+', ',', x.strip()).strip() for x in data]
    data = [x for x in data if len(x.split(',')) >= 3]
    from dramkit.other import traditional2simplified
    data = [traditional2simplified(x) for x in data]
    data = [x for x in data if not '香港' in x and not '对照表' in x]
    data = [x.split(',') for x in data]
    df = pd.DataFrame(data[1:], columns=data[0])
    return df

    
def get_and_merge_hko(start_year=1901, end_year=2100, force_download=False):
    download_hko_calendar(force=force_download)
    save_dir = get_file_path('hko_calendar', 'data')
    save_name = 'hko_calendar.csv'
    save_path = os.path.join(save_dir, save_name)
    print('\nloading hko calendar...')
    if not os.path.exists(save_path):
        time.sleep(0.2)
        data = []
        with tqdm(range(start_year, end_year+1)) as pbar:
            for year in pbar:
                pbar.set_description(str(year))
                df = load_hko_calendar(year)
                data.append(df)
        data = pd.concat(data, axis=0)
        data.reset_index(drop=True, inplace=True)
        data.to_csv(save_path, index=None, encoding='gbk')
    else:
        print('hko calendar data: `{}` already exists.'.format(save_name))
        data = pd.read_csv(save_path, encoding='gbk')
    return data


def _date_reformat_chn(date, joiner='.'):
    """
    指定连接符为joiner，重新格式化date，date格式为x年x月x日
    """
    date = date.replace('年', '-').replace('月', '-').replace('日', '')
    date = joiner.join([x.zfill(2) for x in date.split('-')])
    return date


def get_and_handle_hko(force_download=False):
    """
    下载香港天文台数据并整理
    """
    save_dir = get_file_path('hko_calendar', 'data')
    save_path = os.path.join(save_dir, 'hko_calendar_handle.csv')
    if os.path.exists(save_path):
        df = pd.read_csv(save_path, encoding='gbk')
        return df
    
    df = get_and_merge_hko(start_year=1901, end_year=2100,
                           force_download=force_download)
    
    # 公农历日期整理=============================================================
    df['date'] = df['公历日期'].apply(lambda x: _date_reformat_chn(x, '.'))
    df.sort_values('date', ascending=True, inplace=True)
    df['year'] = df['date'].apply(lambda x: x[:4])
    df['农历日'] = df['农历日期'].apply(lambda x: '初一' if '月' in x else x)
    df['农历d'] = df['农历日'].apply(lambda x: cn2an(x.replace('初', '')))
    df['农历月'] = df['农历日期'].apply(lambda x: x if '月' in x else np.nan)
    if pd.__version__ < '2.1.0':
        df['农历月'] = df['农历月'].fillna(method='ffill')
    else:
        df['农历月'] = df['农历月'].ffill()
    df['农历月'] = df['农历月'].fillna('十一月')  # 1901年1月是农历十一月
    df['农历m'] = df['农历月'].apply(lambda x: cn2an(
        x.replace('正', '一').replace('闰', '').replace('月', '')))
    df['农历y'] = df[['year', '农历月']].apply(lambda x:
                    x['year'] if x['农历月'] == '正月' else np.nan,
                    axis=1)
    df['农历y'] = df['农历y'].fillna(method='ffill')
    df['农历y'] = df['农历y'].fillna(str(1900))
    df['农历月'] = df['农历月'].apply(lambda x:
                    x.replace('十一月', '冬月').replace('十二月', '腊月'))
    df['农历date_'] = df['农历y']+'年'+df['农历月']+df['农历日']
    df['农历date'] = df['农历y'] + '.' + \
                     df['农历m'].apply(lambda x: str(x).zfill(2)) + '.' + \
                     df['农历d'].apply(lambda x: str(x).zfill(2))
    df['农历date'] = df[['农历date', '农历date_']].apply(lambda x:
                     x['农历date']+'闰' if '闰' in x['农历date_'] else x['农历date'],
                     axis=1)
    # TODO: 判断农历月大月小

    # 公农历节日================================================================
    df['公历节日'] = df['date'].apply(lambda x: FESTIVAL[x[-5:]] \
                    if x[-5:] in FESTIVAL else np.nan)
    df['农历节日'] = df['农历date_'].apply(lambda x: FESTIVAL_LUNAR[x[-4:]] \
                    if x[-4:] in FESTIVAL_LUNAR and '闰' not in x else np.nan)
    # 除夕单独处理
    df['tmp'] = df['农历节日'].shift(-1)
    df['农历节日'] = df[['农历节日', 'tmp']].apply(lambda x:
                    '除夕' if x['tmp'] == '春节' else x['农历节日'], axis=1)
    df.drop('tmp', axis=1, inplace=True)

    # 星座=====================================================================
    df['星座'] = df['date'].apply(
                    lambda x: get_xingzuo(x[-5:].replace('.', ''), is_md=True))
    
    # 干支纪年==================================================================
    # 按农历年份划分*****************************************************
    # df['干支年'] = df['农历y'].apply(get_tgdz_year)
    # 按农历年份划分*****************************************************
    
    # 按农节气份划分*****************************************************
    # 貌似属相按节气跨年才是正确的，即立春之后进入下一个属相年
    gz_y = df[df['节气'] == '立春'][['date', '节气']].copy()
    gz_y['干支y'] = gz_y['date'].apply(lambda x: x[:4])
    df = pd.merge(df, gz_y[['date', '干支y']], how='left', on='date')
    df['干支y'] = df['干支y'].fillna(method='ffill').fillna(method='bfill')
    df['干支年'] = df['干支y'].apply(get_tgdz_year)
    # 按农节气份划分*****************************************************

    # 干支纪月==================================================================
    # 农历2018年大雪（冬月初一，公历2018.12.07）是甲子月
    solars = df[~df['节气'].isna()][['date', '节气']].copy()
    solars.loc[solars['date'] == '2018.12.07', '干支月'] = TGDZ[0]
    solars.reset_index(drop=True, inplace=True)
    i0 = solars[solars['date'] == '2018.12.07'].index[0]
    tmp1 = solars[solars.index >= i0].copy()
    tmp1['i'] = (tmp1.index - i0) // 2 % 60
    tmp1['干支月'] = tmp1['i'].apply(lambda x: TGDZ[x])
    tmp2 = solars[solars['date'] <= '2018.12.07'].copy()
    tmp2['i'] = (i0 - tmp2.index) // 2 % 60
    tmp2['i'] = tmp2['i'].shift(1)
    _0, _1 = tmp2['i'].dropna().iloc[:2]
    if _0 == _1:
        _0_ = int(_0+1) if int(_0) != 59 else 0 
    else:
        _0_ = int(_0)
    tmp2['i'].fillna(_0_, inplace=True)
    tmp2['干支月'] = tmp2['i'].apply(lambda x: TGDZ[-int(x)])
    solars = pd.concat((tmp2.iloc[:-1, :], tmp1), axis=0)
    df = pd.merge(df, solars[['date', '干支月']], how='left', on='date')
    df['干支月'] = df['干支月'].fillna(method='ffill')
    first = df['干支月'].dropna().iloc[0]
    df['干支月'].fillna(TGDZ[TGDZ.index(first)-1], inplace=True)

    # 干支纪日==================================================================
    # 农历2022年六月十二（公历2022.07.10）是甲子日
    datebase = pd.to_datetime('2022.07.10')
    df['干支日'] = pd.to_datetime(df['date']) - datebase
    df['干支日'] = df['干支日'].apply(lambda x: x.days)
    df['干支日'] = df['干支日'].apply(lambda x: TGDZ[x % 60] \
                    if x >= 0 else TGDZ[-(abs(x) % 60)])
    df['干支date'] = df['干支年']+'年,'+df['干支月']+'月,'+df['干支日']+'日'
    
    df.to_csv(save_path, index=None, encoding='gbk')
    
    return df


def create_hko_constant():
    df = get_and_handle_hko()[['date', '农历date', '干支date']]
    
    sol_lun = df.set_index('date')['农历date'].to_dict()
    lun_sol = df.set_index('农历date')['date'].to_dict()
    sol_gz = df.set_index('date')['干支date'].to_dict()
    
    sol_lun_str = '\n'.join(_get_lines(sol_lun))
    lun_sol_str = '\n'.join(_get_lines(lun_sol))
    sol_gz_str = '\n'.join(_get_lines(sol_gz))
    
    hko_str = constants_template.format(sol_lun_str, lun_sol_str, sol_gz_str)
    
    hko_path = get_file_path('hko_calendar.py')
    with open(hko_path, 'wb') as f:
        f.write(hko_str.encode('utf-8'))


if __name__ == '__main__':    
    create_hko_constant()
